/*
Date: October 2025
Project: Income and Child Maltreatment: Evidence from a Discontinuity in Tax Benefits
Author: Katherine Rittenhouse
Purpose: This file appends annual birth record files and standardizes variable names / cleans the data in order to merge for analysis.
Files in: BSMF99 - BSMF19
Files out: Births_99_19
*/

clear all
set more off


use "BSMF99.DTA",clear
foreach var of varlist _all {
	capture assert missing(`var')
	if !_rc {
		drop `var'
	}
}
tostring _all,force replace
save "Births_99_19.dta",replace
forval x = 0/9 {
	use "BSMF0`x'.DTA",clear
	foreach var of varlist _all {
	capture assert missing(`var')
	if !_rc {
		drop `var'
	}
}
	tostring _all,force replace
	append using "Births_99_19.dta"
	save "Births_99_19.dta",replace

}
forval x = 0/9 {
	use "BSMF1`x'.DTA",clear
	foreach var of varlist _all {
	capture assert missing(`var')
	if !_rc {
		drop `var'
	}
}
	tostring _all,force replace
	append using "Births_99_19.dta"
	save "Births_99_19.dta",replace
	
	}


*cleaning births 99-19

use "Births_99_19.dta",clear

*creating useful variables, standardizing variable names across years

*birth date
gen byte length = strlen(bthdate)
gen birth_y = ""
gen birth_m = ""
gen birth_d = ""
replace birth_y = substr(bthdate,7,4) if length == 10
replace birth_m = substr(bthdate,1,2) if length == 10
replace birth_d = substr(bthdate,4,2) if length == 10

replace birth_y = substr(bthdate,1,4) if length == 8
replace birth_m = substr(bthdate,5,2) if length == 8
replace birth_d = substr(bthdate,7,2) if length == 8


replace length = strlen(bcdob)
replace birth_y = substr(bcdob,1,4) if length == 8
replace birth_m = substr(bcdob,5,2) if length == 8
replace birth_d = substr(bcdob,7,2) if length == 8

replace birth_y = "2010" if length == 6
replace birth_m = substr(bcdob,3,2) if length == 6
replace birth_d = substr(bcdob,5,2) if length == 6

replace birth_y = "200" + substr(bcdob,1,1) if length == 5
replace birth_m = substr(bcdob,2,2) if length == 5
replace birth_d = substr(bcdob,4,2) if length == 5

replace birth_y = "2000" if length == 4 | length == 3
replace birth_m = substr(bcdob,1,2) if length == 4
replace birth_d = substr(bcdob,3,2) if length == 4
replace birth_m = substr(bcdob,1,1) if length == 3
replace birth_d = substr(bcdob,2,2) if length == 3

replace birth_y = "1999" if length == 7
replace birth_m = substr(bcdob,4,2) if length == 7
replace birth_d = substr(bcdob,6,2) if length == 7

destring birth_m birth_y birth_d,force replace
gen bdate_mdy = mdy(birth_m,birth_d,birth_y)


*foreign-born mother 
gen mom_fb = 0
*99-07, two letter state/country codes
replace mom_fb = 1 if birth_y<2008 & bmbthstate =="CH"
replace mom_fb = 1 if birth_y<2008 & bmbthstate =="JA"
replace mom_fb = 1 if birth_y<2008 & bmbthstate =="VN"
replace mom_fb = 1 if birth_y<2008 & bmbthstate =="PI"
replace mom_fb = 1 if birth_y<2008 & bmbthstate =="CN"
replace mom_fb = 1 if birth_y<2008 & bmbthstate =="CU"
replace mom_fb = 1 if birth_y<2008 & bmbthstate =="MX"
replace mom_fb = 1 if birth_y<2008 & bmbthstate =="RE"

*08-12 - US states given 3-digit nums, everywhere else is coded
replace mom_fb = 1 if birth_y>=2008 & birth_y<2013 & substr(bmbthstate,1,1)!="1" 

*13-17 - same as above but different variable name
replace mom_fb = 1 if birth_y>=2013 & birth_y<2018 & substr(bthresmb,1,1)!="1" 

*18-19 - 
replace mom_fb = 1 if birth_y>=2018 & strlen(bthresmb)!=2 
replace mom_fb = 1 if birth_y>=2018 & bthresmb=="MX"
replace mom_fb = 1 if birth_y>=2018 & bthresmb=="CN"
replace mom_fb = 1 if birth_y>=2018 & bthresmb=="PI"


gen mom_mx = 0
replace mom_mx = 1 if birth_y<2008 & bmbthstate=="MX"
replace mom_mx = 1 if birth_y>=2008 & birth_y<2013 & bmbthstate=="MX" 
replace mom_mx = 1 if birth_y>=2013 & birth_y<2018 & bthresmb=="MX" 
replace mom_mx = 1 if birth_y>=2018 & bthresmb=="MX" 
replace mom_mx = 1 if birth_y>=2018 & substr(bthresmb,-2,2)=="MX" 

*wic 2007 on
gen byte wic = (mfood=="Y")
replace wic = 1 if bmwic=="Y"
replace wic = . if birth_y<2008

*labor/delivery paid for by medicaid
gen byte ld_medicaid = paymsold=="02" | paymsold=="2"
replace ld_medicaid = 1 if bmpaydel=="2" | bmpaydel =="02"
replace ld_medicaid = 1 if bmdelpay=="2" | bmdelpay =="02"

gen byte ld_privateins = paymsold=="07" | paymsold=="7"
replace ld_privateins = 1 if bmpaydel=="7"
replace ld_privateins = 1 if bmdelpay=="7"

gen byte ld_selfpay = paymsold=="09" | paymsold=="9"
replace ld_selfpay = 1 if bmpaydel=="9"
replace ld_selfpay = 1 if bmdelpay=="9"

gen byte ld_other = (ld_medicaid==0 & ld_privateins==0 & ld_selfpay==0)



*prenatal care paid for by medicaid
gen byte pc_medicaid = paymsopc =="02" | paymsopc=="2"
replace pc_medicaid = 1 if bmpaypren=="2" | bmpaypren =="02"

*likely on medicaid within past year 
gen medicaid = (ld_medicaid==1|pc_medicaid==1)


* ethnicity
gen byte mhispanic = 1
gen byte fhispanic = 1

replace msporig = bmhisporg if msporig==""
replace fsporig = bmhisporg if fsporig==""

replace mhispanic = 0 if msporig=="1" | msporig=="N"
replace fhispanic = 0 if fsporig=="1" | fsporig=="N"

replace mhispanic = . if msporig=="9" | msporig=="U"
replace fhispanic = . if fsporig=="9"| fsporig=="U"

*race 
*bmrace, bfrace - 1999-2006
*bmrace1-3; bfrace1-3 - 2000-2012
*bfmrace, bmmrace - 2000-2012 
*mracecode1-3; fracecode1-3 - 2013-2019
*fracecodem, mracecodem - 2013 - 2019 - MULTIRACE CODE
destring *race*,force replace

gen byte mraceblack = 0
replace mraceblack = 1 if bmrace==20
replace mraceblack = 1 if bmrace1==20
replace mraceblack = 1 if bmrace2==20
replace mraceblack = 1 if bmrace3==20
replace mraceblack = 1 if mracecode1==20
replace mraceblack = 1 if mracecode2==20
replace mraceblack = 1 if mracecode3==20
replace mraceblack = . if ((bmrace>90 & bmrace!=.) | ((bmrace1>90 & bmrace1!=.) | (mracecode1>90 & mracecode1!=.)))

gen byte mracewhite = 0
replace mracewhite = 1 if bmrace==10
replace mracewhite = 1 if bmrace1==10
replace mracewhite = 1 if bmrace2==10
replace mracewhite = 1 if bmrace3==10
replace mracewhite = 1 if mracecode1==10
replace mracewhite = 1 if mracecode2==10
replace mracewhite = 1 if mracecode3==10
replace mracewhite = . if ((bmrace>90 & bmrace!=.) | ((bmrace1>90 & bmrace1!=.) | (mracecode1>90 & mracecode1!=.)))

gen byte mraceai = 0
replace mraceai = 1 if bmrace==30
replace mraceai = 1 if bmrace1==30
replace mraceai = 1 if bmrace2==30
replace mraceai = 1 if bmrace3==30
replace mraceai = 1 if mracecode1==30
replace mraceai = 1 if mracecode2==30
replace mraceai = 1 if mracecode3==30
replace mraceai = . if ((bmrace>90 & bmrace!=.) | ((bmrace1>90 & bmrace1!=.) | (mracecode1>90 & mracecode1!=.)))

gen byte mraceasian = 0
replace mraceasian = 1 if bmrace>=40 & bmrace<54
replace mraceasian = 1 if bmrace1>=40 & bmrace1<54
replace mraceasian = 1 if bmrace2>=40 & bmrace2<54
replace mraceasian = 1 if bmrace3>=40 & bmrace3<54
replace mraceasian = 1 if mracecode1>=40 & mracecode1<54
replace mraceasian = 1 if mracecode2>=40 & mracecode2<54
replace mraceasian = 1 if mracecode3>=40 & mracecode3<54
replace mraceasian = . if ((bmrace>90 & bmrace!=.) | ((bmrace1>90 & bmrace1!=.) | (mracecode1>90 & mracecode1!=.)))

gen byte mracepi = 0
replace mracepi = 1 if bmrace>=54 & bmrace<60
replace mracepi = 1 if bmrace1>=54 & bmrace1<60
replace mracepi = 1 if bmrace2>=54 & bmrace2<60
replace mracepi = 1 if bmrace3>=54 & bmrace3<60
replace mracepi = 1 if mracecode1>=54 & mracecode1<60
replace mracepi = 1 if mracecode2>=54 & mracecode2<60
replace mracepi = 1 if mracecode3>=54 & mracecode3<60
replace mracepi = . if ((bmrace>90 & bmrace!=.) | ((bmrace1>90 & bmrace1!=.) | (mracecode1>90 & mracecode1!=.)))

*single race var - 1 white; 2 black; 3 ai; 4 asian/pi; 5 hisp; 9 missing
gen momrace = 9
replace momrace = 1 if bmrace ==10 | bmrace1 ==10 | mracecode1 == 10
replace momrace = 2 if bmrace ==20 | bmrace1 ==20 | mracecode1 == 20
replace momrace = 3 if bmrace ==30 | bmrace1 ==30 | mracecode1 == 30
replace momrace = 4 if ((bmrace >=40 & bmrace<60) | (bmrace1 >=40 & bmrace1<60)) | (mracecode1 >=40 & mracecode1<60)
replace momrace = 5 if mhispanic==1

*father's race

gen byte fraceblack = 0
replace fraceblack = 1 if bfrace==20
replace fraceblack = 1 if bfrace1==20
replace fraceblack = 1 if bfrace2==20
replace fraceblack = 1 if bfrace3==20
replace fraceblack = 1 if fracecode1==20
replace fraceblack = 1 if fracecode2==20
replace fraceblack = 1 if fracecode3==20
replace fraceblack = . if ((bfrace>90 & bfrace!=.) | ((bfrace1>90 & bfrace1!=.) | (fracecode1>90 & fracecode1!=.)))

gen byte fracewhite = 0
replace fracewhite = 1 if bfrace==10
replace fracewhite = 1 if bfrace1==10
replace fracewhite = 1 if bfrace2==10
replace fracewhite = 1 if bfrace3==10
replace fracewhite = 1 if fracecode1==10
replace fracewhite = 1 if fracecode2==10
replace fracewhite = 1 if fracecode3==10
replace fracewhite = . if ((bfrace>90 & bfrace!=.) | ((bfrace1>90 & bfrace1!=.) | (fracecode1>90 & fracecode1!=.)))

gen byte fraceai = 0
replace fraceai = 1 if bfrace==30
replace fraceai = 1 if bfrace1==30
replace fraceai = 1 if bfrace2==30
replace fraceai = 1 if bfrace3==30
replace fraceai = 1 if fracecode1==30
replace fraceai = 1 if fracecode2==30
replace fraceai = 1 if fracecode3==30
replace fraceai = . if ((bfrace>90 & bfrace!=.) | ((bfrace1>90 & bfrace1!=.) | (fracecode1>90 & fracecode1!=.)))

gen byte fraceasian = 0
replace fraceasian = 1 if bfrace>=40 & bfrace<54
replace fraceasian = 1 if bfrace1>=40 & bfrace1<54
replace fraceasian = 1 if bfrace2>=40 & bfrace2<54
replace fraceasian = 1 if bfrace3>=40 & bfrace3<54
replace fraceasian = 1 if fracecode1>=40 & fracecode1<54
replace fraceasian = 1 if fracecode2>=40 & fracecode2<54
replace fraceasian = 1 if fracecode3>=40 & fracecode3<54
replace fraceasian = . if ((bfrace>90 & bfrace!=.) | ((bfrace1>90 & bfrace1!=.) | (fracecode1>90 & fracecode1!=.)))

gen byte fracepi = 0
replace fracepi = 1 if bfrace>=54 & bfrace<60
replace fracepi = 1 if bfrace1>=54 & bfrace1<60
replace fracepi = 1 if bfrace2>=54 & bfrace2<60
replace fracepi = 1 if bfrace3>=54 & bfrace3<60
replace fracepi = 1 if fracecode1>=54 & fracecode1<60
replace fracepi = 1 if fracecode2>=54 & fracecode2<60
replace fracepi = 1 if fracecode3>=54 & fracecode3<60
replace fracepi = . if ((bfrace>90 & bfrace!=.) | ((bfrace1>90 & bfrace1!=.) | (fracecode1>90 & fracecode1!=.)))


*single race var - 1 white; 2 black; 3 ai; 4 asian/pi; 5 hisp; 9 missing
gen dadrace = 9
replace dadrace = 1 if bfrace ==10 | bfrace1 ==10 | fracecode1 == 10
replace dadrace = 2 if bfrace ==20 | bfrace1 ==20 | fracecode1 == 20
replace dadrace = 3 if bfrace ==30 | bfrace1 ==30 | fracecode1 == 30
replace dadrace = 4 if ((bfrace >=40 & bfrace<60) | (bfrace1 >=40 & bfrace1<60)) | (fracecode1 >=40 & fracecode1<60)
replace dadrace = 5 if fhispanic==1


*education high school 
destring meduc bmeduc feduc bfeduc bceduc,force replace
*in 2004 bfeduc is miscoded as bceduc
*1 - hs degree or less; 2 some college +; 9 - missing
gen byte momeduc = .
gen byte popeduc = .
replace bfeduc = bceduc if birth_y==2004
*meduc - 2013-2019
replace momeduc = 1 if meduc<=3 & meduc!=.
replace momeduc = 2 if meduc>=4 & meduc!=9

replace momeduc = 9 if meduc==9 

*bmeduc- 2006-2012
replace momeduc = 1 if (birth_y>2005 & bmeduc<=3 & bmeduc!=.)
replace momeduc = 2 if (birth_y>2005 & (bmeduc>=4 & bmeduc <9))

replace momeduc = 9 if (birth_y>2005 & bmeduc==9)

*bmeduc 1999-2005
replace momeduc = 1 if (birth_y<=2005 & bmeduc<=12 & bmeduc!=.)
replace momeduc = 2 if (birth_y<=2005 & (bmeduc>12 & bmeduc<99))

replace momeduc = 9 if (birth_y<=2005 & bmeduc==99)


replace popeduc = 1 if feduc<=3 & feduc!=.
replace popeduc = 2 if feduc>=4 & feduc!=. & feduc!=9

replace popeduc = 9 if feduc==9 

replace popeduc = 1 if (birth_y>2005 & bfeduc<=3 & bfeduc!=.)
replace popeduc = 2 if (birth_y>2005 & bfeduc>=4 & bfeduc <9)

replace popeduc = 9 if (birth_y>2005 & bfeduc==9)

replace popeduc = 1 if (birth_y<=2005 & bfeduc<=12 & bfeduc!=.)
replace popeduc = 2 if (birth_y<=2005 & (bfeduc>12 & bfeduc<99))

replace popeduc = 9 if (birth_y<=2005 & bfeduc==99)

replace momeduc = 9 if momeduc==.
replace popeduc = 9 if popeduc==.

*apgar score 5m
gen apgar5 = ascore5m 
replace apgar5 = bcapg5 if birth_y<2013
destring apgar5,force replace 
replace apgar5 = . if birth_y<2007
replace apgar5 = . if apgar>10

*sex of baby
gen byte male = 0
gen byte female = 0
replace male = 1 if sex=="1" | sex=="M"
replace male = 1 if bcsex=="1" 
replace female = 1 if sex=="2" | sex=="F"
replace female = 1 if bcsex=="2" 

* delivery method (vaginal/cesarean) 
gen byte del_vaginal = 0
gen byte del_ces = 0
destring mdel bmdelmeth,force replace

replace del_vaginal = 1 if mdel>2 & mdel<7
replace del_vaginal = 1 if bmdelmeth>2 & bmdelmeth<7

replace del_ces = 1 if mdel<3
replace del_ces = 1 if bmdelmeth<3

replace del_vaginal = . if del_vaginal==0 & del_ces==0
replace del_ces = . if del_vaginal==0 & del_ces==0

gen del_vagspon = 0
replace del_vagspon = 1 if mdel==3 | bmdelmeth==3 

*high risk - breech, infection, multiple birth pregnancy
gen highrisk = 0

*breech - only after 2005
replace highrisk=1 if bmfetpres=="3" | fetlbth== "3" 

*multiple birth
replace highrisk = 1 if bthtype!="1" & bthtype!=""
replace highrisk = 1 if bctype!="1" & bctype!=""


*gestation length
destring gest bmgestlen, force replace
gen int gestation_days = gest 
replace gestation_days = bmgestlen if gestation_days==.
replace gestation_days=. if gestation_days>350
replace gestation_days=. if gestation_days==0


* number of prenatal care visits (bmprenvst)
destring prevsts bmprenv,force replace
gen int numprenvis = prevsts
replace numprenvis = bmprenv if numprenvis==.
replace numprenvis = . if numprenvis>=99


* month prenatal care began (0-9)
destring precare bmprenbeg,force replace
gen byte prencaremonth = precare
replace prencaremonth = bmprenbeg if prencaremonth==.

*age of mother
destring mage bmage,force replace
gen byte momage = mage
replace momage=bmage if momage==.
replace momage=. if momage==99

gen mbd_y = ""
gen mbd_m=""
gen mbd_d=""

replace mbd_y = substr(mbthdate,1,4) if birth_y>2012 & birth_y<2018
replace mbd_y = substr(mbthdate,7,4) if birth_y>2017 & birth_y!=.

replace mbd_m = substr(mbthdate,5,2) if birth_y>2012 & birth_y<2018
replace mbd_m = substr(mbthdate,1,2) if birth_y>2017 & birth_y!=.

replace mbd_d = substr(mbthdate,7,2) if birth_y>2012 & birth_y<2018
replace mbd_d = substr(mbthdate,4,2) if birth_y>2017 & birth_y!=.

destring mbd_y mbd_m mbd_d,force replace
gen mbd_mdy = mdy(mbd_m,mbd_d,mbd_y)

gen calcage = floor((bdate_mdy-mbd_mdy)/365)
replace momage= calcage if birth_y>2017 & birth_y!=.
replace momage=. if birth_y>2017 & birth_y!=. & calcage==.


gen momagebin = 9
replace momagebin = 1 if momage<23
replace momagebin = 2 if momage>=23 & momage<=27
replace momagebin = 3 if momage>=28 & momage<33
replace momagebin = 4 if momage>=34 & momage<=39
replace momagebin = 5 if momage>=40 & momage!=.

*age of father
destring fage bfage,force replace
gen byte dadage = fage
replace dadage=bfage if dadage==.
replace dadage=. if dadage==99


gen fbd_y = ""
gen fbd_m=""
gen fbd_d=""

replace fbd_y = substr(fbthdate,1,4) if birth_y>2012 & birth_y<2018
replace fbd_y = substr(fbthdate,7,4) if birth_y>2017 & birth_y!=.

replace fbd_m = substr(fbthdate,5,2) if birth_y>2012 & birth_y<2018
replace fbd_m = substr(fbthdate,1,2) if birth_y>2017 & birth_y!=.

replace fbd_d = substr(fbthdate,7,2) if birth_y>2012 & birth_y<2018
replace fbd_d = substr(fbthdate,4,2) if birth_y>2017 & birth_y!=.

destring fbd_y fbd_m fbd_d,force replace
gen fbd_mdy = mdy(fbd_m,fbd_d,fbd_y)

gen calcage2 = floor((bdate_mdy-fbd_mdy)/365)
replace dadage= calcage2 if birth_y>2017 & birth_y!=.
replace dadage=. if birth_y>2017 & birth_y!=. & calcage2==.


gen dadagebin = 9
replace dadagebin = 1 if dadage<23
replace dadagebin = 2 if dadage>=23 & dadage<=27
replace dadagebin = 3 if dadage>=28 & dadage<33
replace dadagebin = 4 if dadage>=34 & dadage<=39
replace dadagebin = 5 if dadage>=40 & dadage!=.

* child order
destring parity prevlbl bmbthlv bmchildbornlv bmbthdead,force replace
gen byte childorder = .
replace childorder = prevlbl + 1 if birth_y>2012
replace childorder = bmbthlv + 1 if birth_y<=2012
replace childorder = bmchildbornlv-bmbthdead if birth_y==1999
replace childorder = . if childorder >20

*drop if child dies 
drop if bcdod!="" & bcdod!="."
drop if cdthdate!="" & cdthdate!="."
drop if childorder==0

*birthweight
destring bthweight bcwt bcwt_,force replace
gen long birthweight = bthweight 
replace birthweight=bcwt if birthweight==.
replace birthweight=bcwt_ if birthweight==.

gen byte lbw = (birthweight<2500 & birthweight>=1500)
gen byte vlbw = (birthweight<1500 & birthweight>=1000)
gen byte elbw = (birthweight<1000)
*trim
drop if (momage<12 | momage>44) & momage!=.
drop if childorder>10 & childorder!=.

*preterm
gen byte preterm_37 = 0
replace preterm_37 = 1 if gestation<259 & gestation!=.

gen byte preterm_34 = 0
replace preterm_34 = 1 if gestation<238  & gestation!=.


*for merge with linkage
gen stateid = bcstateid
replace stateid = bthsfn if birth_y>2012

gen lengthstate=strlen(stateid)
replace stateid = substr(stateid,8,6) if lengthstate==13
replace stateid = "0" + stateid if lengthstate==5
replace stateid = "00" + stateid if lengthstate==4
replace stateid = "000" + stateid if lengthstate==3
replace stateid = "0000" + stateid if lengthstate==2
replace stateid = "00000" + stateid if lengthstate==1
replace stateid = "000000" + stateid if lengthstate==0

gen localid = bclocalid
replace localid = bthregn if birth_y>2012

gen lengthlocal=strlen(localid)
replace localid = substr(localid,8,6) if lengthlocal==13
replace localid = "0" + localid if lengthlocal==5
replace localid = "00" + localid if lengthlocal==4
replace localid = "000" + localid if lengthlocal==3
replace localid = "0000" + localid if lengthlocal==2
replace localid = "00000" + localid if lengthlocal==1
replace localid = "000000" + localid if lengthlocal==0

gen stringyear = birth_y
tostring stringyear,force replace
replace stringyear = "0000" if stringyear == "."

gen bid = stringyear + stateid + localid

keep birth_m birth_d birth_y bdate_mdy wic ld_med ld_privateins ld_selfpay ld_other pc_med mracewhite mraceblack mraceai mraceasian mracepi fracewhite fraceblack fraceai fraceasian fracepi mhispanic fhispanic apgar5 male female del_vag* del_ces gestation_days numprenvis prencaremonth momage childorder *id birthweight preterm* stateid localid bid lbw vlbw elbw momeduc popeduc dadage medicaid momagebin dadagebin momrace dadrace bthresmb bmbthstate mom_fb mom_mx 

save "Births_99_19.dta",replace

